##Processing
#To import and clean data
#impclean() is for the total imports/tariff revenue collected only at hs08 level
impclean <- function(path, sheet_name, values_to_new){
  df<- read_excel(path, sheet = sheet_name, skip =2)
  colnames(df)[c(2,4)] <- c("hs08", "year")
  df <- df %>%
    filter(!is.na(.data[["hs08"]]))%>%
    pivot_longer(
      cols = January:December,
      names_to = "month",
      values_to = values_to_new)%>%
    select(hs08, year, month, values_to_new)%>%
    pivot_wider(
      names_from = year,
      values_from = values_to_new,
      names_prefix = paste0(values_to_new, "_"))%>%
    mutate(hs08 = as.character(hs08)) %>%
    filter(month %in% month.name[1:8])%>%
    return (df)
}

# by product only
cifval <- impclean(path_to_IM, "CIF Import Value", "cifval")
cald <- impclean(path_to_IM, "Calculated Duties", "cald")



# by country
# "Calculated Duties" and "CIF Import Value" Excel files do NOT have a consistent
# column order for country, hs08, and year 
# so the impclean2() function requires inputs of the column indices 
# for country, hs08, and year:

impclean2 <- function(path, sheet_name, values_to_new, col_country, col_hs08, col_year){
  df<- read_excel(path, sheet = sheet_name, skip =2)
  colnames(df)[c(col_country, col_hs08, col_year)] <- c("country", "hs08", "year")
  df <- df %>%
    filter(!is.na(.data[["hs08"]]))%>%
    pivot_longer(
      cols = January:December,
      names_to = "month",
      values_to = values_to_new)%>%
    select(country, hs08, year, month, values_to_new)%>%
    pivot_wider(
      names_from = year,
      values_from = values_to_new,
      names_prefix = paste0(values_to_new, "_"))%>%
    mutate(hs08 = as.character(hs08)) %>%
    filter(month %in% month.name[4:8])%>%
    return (df)
}
cald_country <- impclean2(path_to_countrycald, "Calculated Duties", "cald", 5 ,2, 4)
cifval_country <- impclean2(path_to_countryim, "CIF Import Value", "cifval", 2, 3, 5)



# hs08 codes for agricultural/food products that got exemptions;

df1 <- read.table(list1, sep = "\n", col.names = "hs08") 
df2 <- read.table(list2, sep = "\n", col.names = "hs08") 
food_tariffs_hs08 <- bind_rows(
  read.table(list1, sep = "\n", col.names = "hs08"),
  read.table(list2, sep = "\n", col.names = "hs08")
) %>%
  mutate(hs08 = gsub("\\.", "", hs08))





